{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "17d7561c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n",
    "# For example, here's several helpful packages to load in \n",
    "\n",
    "import numpy as np # linear algebra\n",
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.cm as cm\n",
    "# import seaborn as sns\n",
    "from wordcloud import WordCloud\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.manifold import TSNE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7e101a3b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "原始数据行数：  (141900, 4)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_name</th>\n",
       "      <th>user_location</th>\n",
       "      <th>date</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ᏉᎥ☻լꂅϮ</td>\n",
       "      <td>astroworld</td>\n",
       "      <td>2020-07-25 12:27:21</td>\n",
       "      <td>smelled the scent hand sanitizers today someon...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Tom Basile 🇺🇸</td>\n",
       "      <td>New York, NY</td>\n",
       "      <td>2020-07-25 12:27:17</td>\n",
       "      <td>hey and wouldn have made more sense have the p...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Time4fisticuffs</td>\n",
       "      <td>Pewee Valley, KY</td>\n",
       "      <td>2020-07-25 12:27:14</td>\n",
       "      <td>trump never once claimed #covid hoax all claim...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ethel mertz</td>\n",
       "      <td>Stuck in the Middle</td>\n",
       "      <td>2020-07-25 12:27:10</td>\n",
       "      <td>the one gift #covid give appreciation for the ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>DIPR-J&amp;K</td>\n",
       "      <td>Jammu and Kashmir</td>\n",
       "      <td>2020-07-25 12:27:08</td>\n",
       "      <td>july medium bulletin novel #coronavirusupdates...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         user_name         user_location                 date  \\\n",
       "0           ᏉᎥ☻լꂅϮ            astroworld  2020-07-25 12:27:21   \n",
       "1    Tom Basile 🇺🇸          New York, NY  2020-07-25 12:27:17   \n",
       "2  Time4fisticuffs      Pewee Valley, KY  2020-07-25 12:27:14   \n",
       "3      ethel mertz  Stuck in the Middle   2020-07-25 12:27:10   \n",
       "4         DIPR-J&K     Jammu and Kashmir  2020-07-25 12:27:08   \n",
       "\n",
       "                                                text  \n",
       "0  smelled the scent hand sanitizers today someon...  \n",
       "1  hey and wouldn have made more sense have the p...  \n",
       "2  trump never once claimed #covid hoax all claim...  \n",
       "3  the one gift #covid give appreciation for the ...  \n",
       "4  july medium bulletin novel #coronavirusupdates...  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('clean.csv', index_col=0)\n",
    "print('原始数据行数： ', df.shape)\n",
    "# df.fillna('', inplace=True)  # 不改变行数，清洗后空白数据保留\n",
    "# df.dropna(inplace=True)  # 改变行数，干掉空白数据\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "38ee00be",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "a17f8487",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tfidf shape:  (141900, 20000)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([10.46498964, 10.46498964, 10.09029619, ..., 10.37797826,\n",
       "       10.15483471, 10.56029982])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vectorizer = TfidfVectorizer(stop_words='english',\n",
    "                                ngram_range=(1,2),sublinear_tf=True, use_idf=True,\n",
    "#                              max_df=0.8, ## 去掉高频口水词\n",
    "#                              min_df=0.001, ## 去掉低频罕见词，或者错误拼写的少数派\n",
    "                             max_features=20000\n",
    "                            )\n",
    "X = vectorizer.fit_transform(df['text'])\n",
    "print('tfidf shape: ', X.shape)  # (140101, 1458)\n",
    "vectorizer.idf_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "e05ec922",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "now fitting 2 clusters using  Mini batch K-means algorithm\n",
      "now fitting 4 clusters using  Mini batch K-means algorithm\n",
      "now fitting 6 clusters using  Mini batch K-means algorithm\n",
      "now fitting 8 clusters using  Mini batch K-means algorithm\n",
      "now fitting 10 clusters using  Mini batch K-means algorithm\n",
      "now fitting 12 clusters using  Mini batch K-means algorithm\n",
      "now fitting 14 clusters using  Mini batch K-means algorithm\n",
      "now fitting 16 clusters using  Mini batch K-means algorithm\n",
      "now fitting 18 clusters using  Mini batch K-means algorithm\n",
      "now fitting 20 clusters using  Mini batch K-means algorithm\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAuYAAAFNCAYAAABMqqrjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABLqklEQVR4nO3dd3gVZfrG8e+T0EWKinQFERAQpUkRV4oIKAquAirs6rprW7Drrm2tqD9Y1y66YgNdELGCiiiigIUWehdEhEDoXUog5/n9cSYxiZAESDInyf25rnNl5p1ynjNiuHnPO++YuyMiIiIiIuGKC7sAERERERFRMBcRERERiQkK5iIiIiIiMUDBXEREREQkBiiYi4iIiIjEAAVzEREREZEYoGAuIlIAmdlfzOy7dOtuZqeGWVNuyc3PYmYrzaxTbpxLRCSvKZiLiMSoIFTuMbNd6V4vhl0XpP3DwM3smUztPYL2oTk8z0QzuzZPihQRKWAUzEVEYtvF7l423eumsAtK5yegt5kVS9d2NfBjSPWIiBRoCuYiIoXHhWa2wsw2mdmTZhYHYGZxZvYvM/vFzDaY2VtmVj7YNszM7gyWqwe93f2D9TpmtiX1PAexDpgPdAn2Pw44GxiTficza21mP5jZNjOba2btg/bHgT8ALx7k24BOZrYsOGawmVl2nyXY/udg22Yzu//oLqeISP5SMBcRKTz+CLQAmgE9gL8G7X8JXh2AU4CyQGoIngS0D5bbASuAc9Otf+vukSze8y3gqmD5CmA0sC91o5lVBz4DHgOOA+4CPjCzSu5+P/AtcNNBvg24CDgLOAPoTRD+s/osZtYQeBn4M1ANOB6okUXtIiIxRcFcRCS2fRz0Gqe+rsti30HuvsXdVwHPAlcG7X2Bp919hbvvAu4FrgiGoEwCzgl6xc8F/g20DY5rF2zPykdA+6DX+iqiQT29PwFj3X2su0fcfTyQAFyYzXkHuvu24LN8AzTJwWfpCXzq7pPdfR/wAJDVPypERGKKgrmISGy7xN0rpHu9msW+q9Mt/0K015jg5y+ZthUDKrv7T8CvRIPvH4BPgbVmVp8cBHN330O0R/xfwPHu/n2mXU4GeqX/xwVwDlA1q/MSHSaTajfRnvEsP0uwLe0auPuvwOZs3kdEJGYUy34XEREpIGoCC4Plk4C1wfJaogGZdNsOAOuD9UlEe5tLuPsaM5tE9CbOisCcHLzvW8DXwCMH2bYaeNvdD9XT7zk4f3pZfZYkoEHqBjMrQ3Q4i4hIgaAecxGRwuMfZlbRzGoCtwLvBu3vALebWW0zKws8Abzr7geC7ZOAm4DJwfrEYP07d0/JwftOAs4HXjjItv8BF5tZFzOLN7NSZtbezFLHfq8nOlY8p7L6LO8DF5nZOWZWAngU/T0nIgWIfmGJiMS2TzLNY/5RFvuOBmYS7eX+DHg9aH8DeJto8P4Z2AvcnO64ScCx/BbMvwPKpFvPkkdNcPctB9m2muiNqPcBG4n2oP+D3/7+eQ7oaWZbzez5HLzdIT+Luy8E+gMjiPaebwUSc/IZRERigbkf7reIIiIiIiKS29RjLiIiIiISAxTMRURERERigIK5iIiIiEgMUDAXEREREYkBCuYiIiIiIjFADxgKnHDCCV6rVq2wyxARERGRQm7mzJmb3L1S5nYF80CtWrVISEgIuwwRERERKeTM7JeDtWsoi4iIiIhIDFAwFxERERGJAXkWzM3sDTPbYGYLDrLtTjNzMzshWDcze97MlpvZPDNrlm7fFDObE7zGpGuvbWbTgmPeNbMSQXvJYH15sL1WXn1GEREREZHckpc95kOBrpkbzawm0BlYla75AqBu8LoeeDndtj3u3iR4dU/XPgh4xt1PBbYCfwva/wZsDdqfCfYTEREREYlpeRbM3X0ysOUgm54B/gl4urYewFseNRWoYGZVD3VuMzOgI/B+0DQMuCTduYYFy+8D5wX7i4iIiIjErHwdY25mPYA17j4306bqwOp064lBG0ApM0sws6lmdknQdjywzd0PHGT/tHMF27cH+x+snuuDcyds3LjxKD6ZiIiIiMjRybfpEs2sDHAf0WEsh+Nkd19jZqcAX5vZfKJh+6i5+xBgCECLFi08m91FRERERPJMfvaY1wFqA3PNbCVQA5hlZlWANUDNdPvWCNpw99SfK4CJQFNgM9HhLsUy75/+XMH28sH+MSlpZxLthrZj3a51YZciIiIiIiHKt2Du7vPd/UR3r+XutYgOP2nm7uuAMcBVwewsrYHt7p5kZhXNrCRAMINLW2CRuzvwDdAzOP3VwOhgeUywTrD962D/mDRg8gC+W/UdAyYNCLsUEREREQlRXk6X+A4wBahvZolm9rcsdh8LrACWA68C/YL2BkCCmc0lGsQHuvuiYNvdwB1mtpzoGPLXg/bXgeOD9juAe3LxY+WqVdtW8eqsV4l4hDfnvKlecxEREZEizGK4MzlftWjRwhMSEvL1PbsN78bY5WMBKBZXjOubXc/gboPztQYRERERyV9mNtPdW2Ru15M/Q5K0M4mvV36dtn4gcoBXZr7CnHVzwitKREREREKjYB6SAZMHEPFIhrYUT6Hlqy0ZPH0wKZGUkCoTERERkTAomIdkSuIUklOSf9deslhJbvr8Jtq83ka95yIiIiJFSL7NYy4Zzb5h9kHb3Z13FrzD7V/cToshLbit9W083P5hypYom88VioiIiEh+Uo95jDEz+jTuw+L+i/lr07/y1JSnaPRSIz798dOwSxMRERGRPKRgHqOOK30cQy4ewrfXfEvZEmW5+J2L6TmqJ2t2rMn+YBEREREpcBTMY9w5J53D7Btm80THJ/hs2Wc0GNyAF6a9oJtDRURERAoZBfMCoER8Ce79w70s+PsC2tRswy3jbqH1662ZnXTwceoiIiIiUvAomBcgdY6rw7i+4xhx6QhWb19Ni1dbcMcXd7AreVfYpYmIiIjIUVIwL2DMjCsbX8ni/ou5rtl1PDP1GRoObsiYpWPCLk1EREREjoKCeQFVsXRF/nvRf/n+r99TvlR5eozswaXvXkrijsSwSxMRERGRI6BgXsCdXfNsZl0/i4HnDWTc8nE0GNyA56c9r5tDRURERAoYBfNCoHh8ce4+524W9FtA25ptuXXcrbR6rRUz184MuzQRERERySEF80LklIqn8Hnfzxl52UgSdyTS8rWW3D7udnbu2xl2aSIiIiKSDQXzQsbMuPz0y1ly0xKub3Y9z057loYvNWT0ktFhlyYiIiIiWVAwL6QqlKrAyxe9zA9//YGKpSpyybuXcMnIS1i9fXXYpYmIiIjIQSiYF3JtarZh5vUzGdRpEF/+9CUNBjfg2anPciByIOzSRERERCQdBfMioHh8cf7Z9p8s7LeQc08+l9u/uJ1Wr7UiYW1C2KWJiIiISEDBvAipXbE2n/X5jFE9R7F251pavdaKWz+/lR37doRdmoiIiEiRp2BexJgZvRr1Ykn/JdzY/EZemP4CDQc35KPFH+HuYZcnIiIiUmQpmBdR5UuVZ3C3wUz52xSOL3M8l466lEvevYRV21eFXZqIiIhIkaRgXsS1qtGKhOsSePL8J/lqxVc0HNyQp6c8rZtDRURERPKZgrlQPL44d519F4v6LaJ9rfbc+eWdnPXqWcxYMyPs0kRERESKDAVzSXNyhZP55MpPeL/X+6zftZ5Wr7Xi5rE36+ZQERERkXygYC4ZmBmXNbyMxf0X0/+s/gyeMZgGgxvwwaIPdHOoiIiISB5SMJeDKl+qPC9c+AJTr51KpTKV6PleT7qP7M4v234JuzQRERGRQknBXLLUsnpLEq5P4KnOT/H1z1/T8KWGPPXDU7o5VERERCSXKZhLtorFFeOONnewqN8iOtbuyF3j76LFkBZMS5wWdmkiIiIihYaCueTYyRVOZswVY/ig9wds2r2JNq+34aaxN7F97/awSxMREREp8BTM5bCYGZc2uJRF/Rdxc8ubeWnGSzQY3ID3F72vm0NFREREjoKCuRyRciXL8dwFzzHt2mlUKVuFXu/14qJ3LmLltpVhlyYiIiJSICmYy1E5q/pZTL9uOk93fppJKyfRcHBDnvz+Sfan7A+7NBEREZECRcFcjlqxuGLc3uZ2FvVfxPl1zuefX/2TFq+2YGri1LBLExERESkwFMwl15xU/iRGXzGajy7/iM27N3P262fT77N+bNu7LezSRERERGKegrnkuktOu4TF/RdzS6tbeGXmKzQY3IBRC0fp5lARERGRLORpMDezN8xsg5ktOMi2O83MzeyEYN3M7HkzW25m88ysWbp9rzazZcHr6nTtzc1sfnDM82ZmQftxZjY+2H+8mVXMy88pv3dsyWN5tuuzTL92OtWOrcbl719OtxHd+Hnrz2GXJiIiIhKT8rrHfCjQNXOjmdUEOgOr0jVfANQNXtcDLwf7Hgc8BLQCWgIPpQvaLwPXpTsu9b3uASa4e11gQrAuIWherTnTrp3Gs12e5dtV39LopUYM+m6Qbg4VERERySRPg7m7Twa2HGTTM8A/gfRjG3oAb3nUVKCCmVUFugDj3X2Lu28FxgNdg23l3H2qR8dIvAVcku5cw4LlYenaJQTF4opxa+tbWdx/MV1P7co9E+6h+ZDmTFk9JezSRERERGJGvo8xN7MewBp3n5tpU3Vgdbr1xKAtq/bEg7QDVHb3pGB5HVA5d6qXo1GjXA0+vPxDPr78Y7bu3crZb5zNjZ/eyNY9W8MuTURERCR0+RrMzawMcB/wYH69Z9CbftC7Ds3sejNLMLOEjRs35ldJRV6P03qwqN8ibm99O6/OepUGgxswcsFI1u5YS7uh7Vi3a13YJYqIiIjku/zuMa8D1AbmmtlKoAYwy8yqAGuAmun2rRG0ZdVe4yDtAOuDoS4EPzccrBh3H+LuLdy9RaVKlY7yo8nhOLbksTzd5WlmXDeDGuVqcOUHV9L69dZ8+8u3DJg0IOzyRERERPJdvgZzd5/v7ie6ey13r0V0+Ekzd18HjAGuCmZnaQ1sD4ajfAF0NrOKwU2fnYEvgm07zKx1MBvLVcDo4K3GAKmzt1ydrl1iTLOqzZh27TQGtB/A6h2rcZz/zvwvj056lMQdidmfQERERKSQyOvpEt8BpgD1zSzRzP6Wxe5jgRXAcuBVoB+Au28BBgAzgtejQRvBPq8Fx/wEfB60DwTON7NlQKdgXWJUfFw8a3etpUR8CQAiHuGhiQ9R85matH2jLc9NfY41O9ZkcxYRERGRgs300JeoFi1aeEJCQthlFElJO5M45flT2Htgb1pbyfiS3NbqNj7/6XPmrZ8HQNuabenVsBc9G/akernqhzqdiIiISEwzs5nu3iJzu578KaEbMHkAEY9kaHOcnck7mXvjXJb0X8KADgPYmbyT2764jRrP1OCcN87h+WnPqyddRERECg31mAfUYx6epq80Zc66Ob9rb1KlCbNvmJ2hbemmpby36D1GLRzF/A3zMYy2J7Wld8PeXNbwMqodWy2fqhYRERE5MofqMVcwDyiYFzxLNi3hvYXv8d6i99JC+jknnUOvhr0U0kVERCRmKZhnQ8G8YEsN6aMWjWLBhgVpIb13o95c1uAyqh5bNewSRURERAAF82wpmBceizcuThvusnDjQgzjDyf/IdqTrpAuIiIiIVMwz4aCeeG0aOOitOEu6UN66pj0KmWrhF2iiIiIFDEK5tlQMC/8UkP6qEWjWLRxEYZx7snnpo1JV0gXERGR/KBgng0F86Jl4YaFacNdFm9anBbSezfqzaUNLlVIFxERkTyjYJ4NBfOiK3NIj7O4aEhvGA3plctWDrtEERERKUQUzLOhYC7uzsKNC9OGuyzZtIQ4i6Pdye3o1bCXQrqIiIjkCgXzbCiYS3qpIX3UwlGMWjiKpZuXpoX01OEuJx5zYthlioiISAGkYJ4NBXM5FHdnwYYFacNdUkN6+1rt03rSFdJFREQkpxTMs6FgLjmRGtJHLRzFqEWj+HHzj2khPXVMeqVjKoVdpoiIiMQwBfNsKJjL4XJ35m+YnzYmPTWkd6jVgd6NevPH0/6okC4iIiK/o2CeDQVzORruzrz189KGuyzbsox4i6dD7Q70athLIV1ERETSKJhnQ8FccktqSE8d7rJ8y/K0kN67YW/+2OCPnFDmhLDLFBERkZAomGdDwVzygrszd/3ctOEuqSG9Y+2O0Z50hXQREZEiR8E8GwrmktdSQ/qohaN4b9F7GUJ66pj048scn7Z/0s4krvjgCt7t+a6eRCoiIlKIKJhnQ8Fc8pO7M2fdnLQx6T9t/Yl4i+e8U86jd8PeXHLaJTzwzQO8MvMVbmx+I4O7DQ67ZBEREcklCubZUDCXsLg7s9fNThvusmLrCuItHseJeITSxUqz4tYV6jUXEREpJA4VzONycGAZM3vAzF4N1uua2UV5UaRIUWRmNKvajP/r9H8sv3k5M6+fSePKjYl4BIC9B/Zy15d3hVyliIiI5LVsgznwJrAPaBOsrwEey7OKRIowM6Nq2aos2bQkrc1xhs8fzq2f38qu5F0hViciIiJ5KSfBvI67/xvYD+DuuwHL06pEirABkwek9ZaniiOO56c/z2kvnsY7899BQ9BEREQKn5wE82QzKw04gJnVIdqDLiJ5YEriFJJTkjO0RYhQ97i6VC5bmT4f9qHd0HbMXTc3pApFREQkLxTLwT4PAeOAmmY2HGgL/CUvixIpymbfMPuQ21IiKbwx+w3u+/o+mg1pxg3Nb2BAhwEZplkUERGRgilHs7KY2fFAa6JDWKa6+6a8Liy/aVYWKUi27tnKwxMfZvCMwZQvVZ7HOjzG9c2vJz4uPuzSREREJBtHMyvLH4ED7v6Zu38KHDCzS/KgRhHJoYqlK/LcBc8x+4bZnFn5TPqN7UfzIc2Z/MvksEsTERGRI5STMeYPufv21BV330Z0eIuIhKxx5cZMuGoC7/V6j617t9JuaDuu/OBKEnckhl2aiIiIHKacBPOD7ZOTsekikg/MjJ4Ne7K4/2IeavcQHy/5mPov1ueJb59g74G9YZcnIiIiOZSTYJ5gZk+bWZ3g9TQwM68LE5HDU6Z4GR5u/zCL+y+m66lduf/r+2n0UiPGLB2j6RVFREQKgJwE85uBZODd4LUP6J+XRYnIkatVoRYf9P6A8X8eT8n4kvQY2YMLR1zI0k1Lwy5NREREspCjWVmKAs3KIoXR/pT9DJ4xmIcmPsTu/bu5rdVtPNDuAcqVLBd2aSIiIkXW0czKUs/MhpjZl2b2deorb8oUkdxUPL44t7W+jWU3L+PqM6/mqSlPUe+FegybM+x3TxcVERGRcGXbY25mc4H/Eh1XnpLa7u6Fapy5esylKJixZgY3f34z09ZMo3WN1rxwwQu0qPa7f7CLiIhIHjriHnOic5i/7O7T3X1m6isPahSRPHZW9bP44W8/MLTHUH7e+jMtX23JtWOuZcOvG8IuTUREpMjLSTD/xMz6mVlVMzsu9ZXdQWb2hpltMLMF6doGmNk8M5sTDI2pFrRXNLOPgm3Tzez0dMesNLP5wTEJ6dqPM7PxZrYs+FkxaDcze97Mlgfna3ZYV0SkkIuzOK5ucjVLb1rKHW3uYNjcYdR7oR7PTX2O/Sn7wy5PRESkyMpJML8a+AfwA9HhLDOBnIz5GAp0zdT2pLuf4e5NgE+BB4P2+4A57n4GcBXwXKbjOrh7k0xd/vcAE9y9LjAhWAe4AKgbvK4HXs5BrSJFTvlS5flP5/8w/+/zaVWjFbd9cRtNX2nKhBUTwi5NRESkSMo2mLt77YO8TsnBcZOBLZnadqRbPQZIHeDeEPg62GcJUMvMKmfzFj2AYcHyMOCSdO1vedRUoIKZVc2uXpGi6rQTTmNc33GMvmI0ew7sodPbneg5qicrt60MuzQREZEiJSc95pjZ6WbW28yuSn0d6Rua2eNmthroy2895nOBS4PtLYGTgRrBNge+NLOZZnZ9ulNVdvekYHkdkBrkqwOr0+2XGLSJyCGYGd3rd2dhv4U81uExPl/+OQ0GN+DhiQ+ze//usMsTEREpEnIyXeJDwAvBqwPwb6D7kb6hu9/v7jWB4cBNQfNAoj3bc4g+0Gg2v80Ac467NyM6RKW/mZ17kHM6v/W+55iZXW9mCWaWsHHjxsP/MCKFTKlipbj/3PtZ0n8JPer34JFJj9BgcAM+WPSBnh4qIiKSx3LSY94TOA9Y5+7XAGcC5XPhvYcDl0F0iIu7XxOMPb8KqASsCLatCX5uAD4CWgbHr08dohL8TJ1WYg1QM9371Ajafsfdh7h7C3dvUalSpVz4SCKFQ83yNRnZcyQTr55I+ZLl6fleTzq93YmFGxaGXZqIiEihlZNgvsfdI8ABMytHNADXzOaYgzKzuulWewBLgvYKZlYiaL8WmOzuO8zsGDM7NtjnGKAzkDrLyxiiN6YS/Bydrv2qYHaW1sD2dENeROQwtKvVjlk3zOLFC15kdtJszvzvmdz6+a1s27st7NJEREQKnZwE8wQzqwC8SnRGllnAlOwOMrN3gv3qm1mimf0NGGhmC8xsHtGQfWuwewNggZktJTpkJbW9MvBd8JCj6cBn7j4u2DYQON/MlgGdgnWAsUR725cHNffLwWcUkUMoFleM/i378+PNP3Jds+t4YfoL1H2hLq/Neo2USEr2JxAREZEcyfbJnxl2NqsFlHP3eXlWUUj05E+RnJmdNJtbxt3Cd6u+o3nV5rxwwQu0qdkm7LJEREQKjCN+8qeZpU1q7O4r3X1e+jYRKVqaVm3K5L9MZsSlI0jalcTZb5zNVR9dRdJOjRgTERE5GocM5mZWKnjC5wnBkzlTn/pZC00/KFKkmRlXNr6SpTct5d5z7uXdhe9S78V6PPn9kySnJIddnoiISIGUVY/5DUTHlJ/Gb0/8nEn0JssX8740EYl1ZUuU5YnznmBhv4W0r9Wef371Txq/3Jhxy8dlf7CIiIhkcMhg7u7PuXtt4C53PyXdUz/PdHcFcxFJc+pxp/LJlZ/wWZ/PcHcuGH4B3d/pzvIty8MuTUREpMDIyaws69JNWfgvM/vQzJrlcV0iUgBdWPdCFvRbwKBOg/hm5Tc0eqkR90+4n13Ju8IuTUREJOblJJg/4O47zewcotMSvg68nLdliUhBVSK+BP9s+0+W3rSUyxtdzhPfPcFpL57GO/Pf0dNDRUREspCTYJ46UXE3YIi7fwaUyGJ/ERGqHVuNt/74Ft9d8x2Vy1amz4d9OHfoucxZNyfs0kRERGJSToL5GjN7BbgcGGtmJXN4nIgIbU9qy/RrpzPkoiEs2bSE5kOa0++zfmzevTns0kRERGJKTgJ2b+ALoIu7bwOOA/6Rl0WJSOESHxfPdc2v48ebfuSms25iyMwh1H2hLi/NeElPDxUREQlkNY95uWCxFDAR2BzMa74P0CMyReSwVSxdkecueI7ZN8ymSZUm9B/bn+ZDmjP5l8lhlyYiIhK6rHrMRwQ/ZxIN4unnMlcwF5Ej1rhyYyZcNYH3er3H1r1baTe0HVd+cCWrt68OuzQREZHQmGZJiGrRooUnJOjfGyL5bff+3Qz6bhCDvh9EfFw8951zH3eefSelipUKuzQREZE8YWYz3b1F5vYsx5ibWTEzu9jM/hG8uplZsbwrU0SKmjLFy/BIh0dY3H8xXU/tyr+++ReNXmrEmKVjNL2iiIgUKVmNMa8OLATuBKoB1YF/AgvNrFr+lCciRUXtirX5oPcHfPmnLykZX5IeI3twwfALWLJpCUk7k2g3tB3rdq0Lu0wREZE8c8ihLGY2FJjj7s9mar8FaO7uV+d5dflIQ1lEYsf+lP0MnjGYhyY+xO79u2lUqRHz18/nxhY3Mrjb4LDLExEROSpHMpSldeZQDuDuzwOtc7E2EZEMiscX57bWt/HjTT/Ss0FP5q6fS4QIQ2YO4cufvtQQFxERKZSyCuZ7sti2O7cLERHJrHLZylQsXZHiccUBOOAH6PK/LjR8qSGPTX6MFVtXhFyhiIhI7snqRs7yZnbpQdoNKHeQdhGRXJW0M4k357zJ/sj+tLbiccUpX7I8D3zzAA988wBtarShT+M+XN7ociodUynEakVERI5OVj3mk4CLD/K6CNDTQEQkzw2YPICIRzK0mRnNqzZn5a0rGXjeQHYl7+Lmz2+m6lNVuXD4hQyfN5xdybtCqlhEROTIaR7zgG7+FIk9TV9pypx1c37X3qRKE2bfMDttff76+QyfP5wR80ewesdqyhQvQ4/6PejbuC+d63SmeHzxfKxaREQka4e6+VPBPKBgLlLwRTzC96u+Z/j84by36D227NnCCWVOoHfD3vQ9oy9tarTBzMIuU0REijgF82womIsULskpyYxbPo7h84czZukY9h7YS60Ktehzeh/6ntGXhpUahl2iiIgUUQrm2VAwFym8du7byUdLPmL4/OF8teIrIh6hSZUm9G3clytOv4Ia5WqEXaKIiBQhRxzMzawM0ad/nuTu15lZXaC+u3+aN6WGQ8FcpGhYt2sd7y54lxELRjB9zXQMo12tdvRt3JeeDXtSoVSFsEsUEZFC7miC+bvATOAqdz89COo/uHuTPKk0JArmIkXPss3LGDF/BMPnD2fZlmWUiC9Bt7rd6Nu4L93qdaNUsVJhlygiIoXQ0QTzBHdvYWaz3b1p0DbX3c/Mo1pDoWAuUnS5OwlrExgxfwQjF45k3a51lCtZjssaXEbfxn1pX6s98XHxYZcpIiKFxKGCeVYPGEqVbGalAQ9OVAfYl8v1iYiExsw4q/pZnFX9LJ7s/CTf/PwNw+cP5/1F7/PmnDepWrYqV5x+BX0b96VZ1Waa2UVERPJETnrMzwf+BTQEvgTaAn9x94l5Xl0+Uo+5iGS2Z/8ePvnxE0bMH8HYZWPZH9lP/ePr07dxX/o07kOd4+qEXaKIiBRARzSUxczigJ7ABKA1YMBUd9+UV4WGRcFcRLKyZc8W3l/0PsPnD2fyL9GHH7eu0Zo+p/fh8tMv58RjTgy5QhERKSiOeox5nlUWIxTMRSSnVm1fxcgFIxk+fzjz1s8j3uI5v8759G3cl0tOu4SyJcqGXaKIiMSwownmA4FNwLvAr6nt7r4lt4sMk4K5iByJBRsWMHzecEYsGMGq7asoXaw0PU7rQd/GfelSpwvF44uHXaKIiMSYownmPx+k2d39lNwqLhYomIvI0Yh4hO9Xfc+I+SMYtWgUW/Zs4fjSx9O7UW/6Nu5Lm5ptiLO4sMsUEZEYoCd/ZkPBXERyS3JKMl8s/4Lh84czZukY9hzYw8nlT6ZP4z70bdyXRic2CrtEEREJ0VEFczM7neisLGlP23D3t3K1wpApmItIXti5bycfL/mY4fOHM37FeCIe4czKZ9K3cV+uOP0KapavGXaJIiKSz45mKMtDQHuiwXwscAHwnbv3zIM6Q6NgLiJ5bf2u9by78F2Gzx/O9DXTMYxzTz6Xvo370rNhTyqWrhh2iSIikg8OFcxzMuCxJ3AesM7drwHOBMrn4A3fMLMNZrYgXdsAM5tnZnPM7Eszqxa0VzSzj4Jt04Me+tRjuprZUjNbbmb3pGuvbWbTgvZ3zaxE0F4yWF8ebK+Vg88oIpLnKpetzC2tbmHatdNYdvMyHm7/MEm7krj+0+up8lQV/vjuH3lv4Xvs2b8n7FJFRCQEOQnme9w9Ahwws3LABiAn370OBbpmanvS3c9w9ybAp8CDQft9wBx3PwO4CngOwMzigcFEe+kbAleaWcPgmEHAM+5+KrAV+FvQ/jdga9D+TLCfiEhMOfW4U3mw3YMs6b+EGdfNoP9Z/ZmaOJXe7/emylNVuGb0NXy14itSIilhlyoiIvkkJ8E8wcwqAK8CM4FZwJTsDnL3ycCWTG070q0eA6SOo2kIfB3sswSoZWaVgZbAcndf4e7JwEigh0Wfh90ReD84fhhwSbDcI1gn2H6e6fnZIhKjzIwW1VrwdJenSbw9kfF/Hs+lDS7lg0UfcP7b51PzmZrc8cUdJKxNIHXoYdLOJNoNbce6XetCrl5ERHJTsex2cPd+weJ/zWwcUM7d5x3pG5rZ40R7xbcDHYLmucClwLdm1hI4GagBVAdWpzs8EWgFHA9sc/cD6dqrB8tpx7j7ATPbHuxf6J5WKiKFS3xcPJ1O6USnUzrx0oUv8emPnzJiwQhenP4iz0x9hvrH16dP4z4s3bSU71Z9x4BJAxjcbXDYZYuISC7JNpib2bkHawt6xA+bu98P3G9m9wI3AQ8BA4HnzGwOMB+YDeT597dmdj1wPcBJJ52U128nIpJjpYuXplejXvRq1Iute7by/qL3GT5/OA9NfChtn1dmvkK94+vR9dSu1Du+HvpyUESkYMvJrCyfpFstRXR4yUx375jtyaM3Xn7q7qcfZNtJwNjM24JhJz8DZwCNgIfdvUuw7d5gt4HARqBK0CveJnU/M/siWJ5iZsWAdUAlz+aDalYWESkI/vzhn3lnwTukeMa+i+NKH0er6q1oU6MNbWq2oWX1lpQrWS6kKkVEJCuHmpUlJ0NZLs50oprAs0dYRF13Xxas9gCWBO0VgN3BOPJrgcnuvsPMZgB1zaw2sAa4Aujj7m5m3xCdMWYkcDUwOjjvmGB9SrD96+xCuYhIQZC0M4n3F7+fIZSXjC/JEx2fYPGmxUxJnMK45eNwHMNodGIjWldvTZuabWhdozWnnXCanj4qIhLDsg3mB5EINMhuJzN7h+j85yeYWSLRISsXmll9IAL8AtwY7N4AGGZmDiwkmGEl6A2/CfgCiAfecPeFwTF3AyPN7DGiQ19eD9pfB942s+VEbz694gg+o4hIzBkweQARj2Roc5yftv7Eq91fBWD73u1MXzOdKYlTmJo4lQ8Wf8Brs18DoHzJ8rSqEfSq14j2qmvudBGR2JGToSwv8NvsKXFAE2Clu/8pb0vLXxrKIiKxrukrTZmzbs7v2ptUacLsG2Yf9JiIR1i2eRlTEqcwZfUUpq6ZyoINC9ICfoMTGtC6Rmva1Ij2qjes1JD4uPi8/BgiIkXe0Tz58+p0qweIhvLvc7m+0CmYi0hRsXPfTmasnZEW1KesnsLmPZsBOLbEsbSq0SptCEyr6q04vszxIVcsIlK4HHEwLyoUzEWkqHKPDoeZsnpK2hCYeevnpY1lr3d8vQy96qefeDrF4o5kJKSIiMDR9ZjP57ehLBk2AR48rbPAUzAXEfnNr8m/krA2IS2oT0mcwoZfNwBwTPFjaFm9ZYawXumYSiFXLCJScBxNMP93sPh28LNv8PNlAHf/JbeKDJOCuYjIobk7P2/7ORrSgyEwc9bN4UAk+py3OhXrpAX1NjXb0PjExhSPLx5y1SIiselogvlsd2+aqW2WuzfL5RpDpWAuInJ4du/fzaykWWlDYKYkTmHdrnUAlC5WmrOqn5VhusYqZauEXLGISGw4mmA+B+ifesOnmZ0NvOTuTfKgztAomIuIHB13Z9X2VWlDX6YmTmVW0iz2R/YDUKtCrd961Wu04cwqZ1IivkTIVYuI5L+jCebNgTeA8kTHlW8F/urus/Ki0LAomIuI5L69B/YyO2l2Wo/61MSpJO5IBKBUsVI0r9o8w1j16uWqh1yxiEjeO+pZWcysPIC7b8/l2mKCgrmISP5I3JGYYaz6zLUz2ZeyD4Ca5WpmGKvetEpTShYrGXLFIiK567CDuZldDMxLvbnTzB4ELiP6xM5b3f3nPKw33ymYi4iEY9+BfcxZNyfDEJhftkfnFSgRX4JmVZtlGKtes1xNzCzt+KSdSVzxwRW82/NdjWMXkQLhSIL5PKC1u+82s4uAp4ErgaZAL3fvkpcF5zcFcxGR2JG0MyktqE9JnELC2gT2HtgLQLVjq6UNfWlTow3D5g7j9dmvc2PzGxncbXDIlYuIZO9Igvlcdz8zWH4DWOrug4J1zcoiIiL5Zn/Kfuaun5uhV33F1hUZ9ildrDQrbl2hXnMRiXmHCuZxWR9jZc0sDjgPmJBuW6ncLlBERORQiscXp0W1FtzU8iaGXzqcn275iXV3rqPrqV0xosNaklOSGTBpQMiViogcuayC+bPAHCABWOzuCQBm1hRIyvPKREREshDxCBNXTsSDh1OneApDZg1Jm0tdRKSgOWQwd/c3gHbA34AL021aB1yTx3WJiIhkacDkAUQ8kqHtQOQAPd7pEVJFIiJHJ6sec9x9jbvPdv/tN5+7J7n7qrwvTURE5NCmJE4hOSX5d+3T107n0UmPhlCRiMjRKRZ2ASIiIkdi9g2zf9eWEknhr2P+ykMTHyLiER5u/3D+FyYicoQOGczNrHZhm6tcREQKt/i4eN7o/gZxFscjkx7B3Xm4/cMZ5j0XEYlVWfWYvw80N7MJ7n5efhUkIiJyNOLj4nm9++vEEcejkx8l4hEe7fCowrmIxLysgnmcmd0H1DOzOzJvdPen864sERGRIxdncbza/VXiLI7Hvn2MiEd4rONjCuciEtOyCuZXAJcE+xybL9WIiIjkkjiL45WLXyHO4njiuyeIeIQnzntC4VxEYtYhg7m7LwUGmdk8d/88H2sSERHJFXEWx8sXvUycxTHw+4GkeAqDOg1SOBeRmJSTWVl+MLOngXOD9UnAo+6+Pe/KEhERyR1xFsdL3V4izuJ48ocniXiEJ89/UuFcRGJOToL5G8ACoHew/mfgTeDSvCpKREQkN5kZL174InEWx1NTniLiEZ7q/JTCuYjElJwE8zruflm69UfMbE4e1SMiIpInzIznL3ieOIvjmanPEPEIz3R5RuFcRGJGToL5HjM7x92/AzCztsCevC1LREQk95kZz3Z9ljiL49lpzxLxCM91fU7hXERiQk6C+Y3AW2ZWPljfClyddyWJiIjkHTPj6S5PE2dxPD31aSIe4YULXlA4F5HQZRvM3X0ucKaZlQvWd+R5VSIiInnIzPhP5/8QZ3H8Z8p/iHgkbQy6iEhYctJjDiiQi4hI4WJm/Pv8fxNncfz7h38T8Uja7C0iImHIcTAXEREpbMyMgZ0Gps1zHvEI/73ovwrnIhIKBXMRESnSzIwnznsiwxNCh1w8ROFcRPJdtsHczOKBbkCt9Pu7+9N5V5aIiEj+MTMe6/gYcRbHY98+RsQjvNb9NYVzEclXOekx/wTYC8wHInlbjoiISDjMjEc7PEp8XDyPTHoEx3nt4teIj4sPuzQRKSJyEsxruPsZeV6JiIhIyMyMh9s/jGE8POlhIh7hje5vKJyLSL7ISTD/3Mw6u/uXeV6NiIhIDHio/UPEWRwPTnyQiEcY2mOowrmI5LmcBPOpwEdmFgfsBwxwdy+Xp5WJiIiE6IF2DxBncfzrm38R8QjDLhlGsTjNmSAieScnv2GeBtoA893d87geERGRmHH/ufcTZ3Hc9/V9RDzC2398W+FcRPJMTm43Xw0sONxQbmZvmNkGM1uQrm2Amc0zszlm9qWZVQvay5vZJ2Y218wWmtk16Y5JCfafY2Zj0rXXNrNpZrbczN41sxJBe8lgfXmwvdbh1C0iIpLevX+4l4HnDWTkgpH0/bAvByIHwi5JRAqpnATzFcBEM7vXzO5IfeXguKFA10xtT7r7Ge7eBPgUeDBo7w8scvczgfbAU6lBG9jj7k2CV/d05xoEPOPupwJbgb8F7X8DtgbtzwT7iYiIHLG7z7mbJ89/klELR9Hngz7sT9kfdkkiUgjl5Pu4n4NXieCVI+4+OXNvtbvvSLd6DJDaC+/AsWZmQFlgC3DILolgv45An6BpGPAw8DLQI1gGeB940cxMw3BERORo3HX2XcRZHHd+eScRj/DOZe9QPL542GWJSCGSbTB390dy8w3N7HHgKmA70CFofhEYA6wFjgUud/fUOdNLmVkC0aA+0N0/Bo4Htrl7anhPBKoHy9WJDr/B3Q+Y2fZg/00HqeV64HqAk046KRc/pYiIFEZ3tLmDOIvj9i9u5/L3L2dkz5GUiM9xn5WISJayHcpiZt+Y2deZX0f6hu5+v7vXBIYDNwXNXYA5QDWgCdFe7tRZX0529xZEe8efNbM6R/reB6lliLu3cPcWlSpVyq3TiohIIXZb69t4rutzfLTkI3q/15vklOSwSxKRQiInY8zvAv4RvB4gGqATcuG9hwOXBcvXAB961HKiQ2dOA3D3NcHPFcBEoCmwGahgZqk9/jWANcHyGqAmQLC9fLC/iIhIrril1S28cMELjF46mp6jerLvwL6wSxKRQiDbYO7uM9O9vnf3O4jeoHnYzKxuutUewJJgeRVwXrBPZaA+sMLMKppZyaD9BKAt0ZtEHfgG6BkcfzUwOlgeE6wTbP9a48tFRCS33dTyJl684EU++fETer6ncC4iRy/bMeZmdly61TigOdFe6OyOe4dogD/BzBKBh4ALzaw+EAF+AW4Mdh8ADDWz+UQfYHS3u28ys7OBV8wsErz3QHdfFBxzNzDSzB4DZgOvB+2vA2+b2XKiN5FekV2tIiIiR6J/y/7EWRz9xvbj0lGX8kHvDyhVrFTYZYlIAWXZdSab2c9EZ00xojdg/gw86u7f5X15+adFixaekJAbI3RERKSoGTJzCDd8egMXnHoBH17+ocK5iGTJzGYG91BmkJNZWWrnTUkiIiKFw/XNryfO4rjuk+u4ZOQlfHzFxwrnInLYDjnG3MzOMrMq6davMrPRZvZ8puEtIiIiRd61za7l9e6v8+VPX9JjZA/27N8TdkkiUsBkdfPnK0AygJmdCwwE3iI6//iQvC9NRESkYPlr07/yevfXGf/TeLqP7M7u/bvDLklECpCsgnm8u28Jli8Hhrj7B+7+AHBq3pcmIiJS8FzT9Bre7PEmE1ZM4OJ3LlY4F5EcyzKYp5sn/Dwg/UOFsh2bLiIiUlRd3eRqhl0yjG9+/oaLRlzEr8m/hl2SiBQAWQXzd4BJZjYa2AN8C2BmpxIdziIiIiKH8Ocz/8zbf3ybSb9MotuIbgrnIpKtQ/Z8u/vjZjYBqAp8me4hPXHAzflRnIiISEHW94y+xFkcf/roT1w44kI+6/MZZUuUDbssEYlRWQ5JcfepB2n7Me/KERERKVyubHwlcRZH3w/7csHwCxjbZyzHljw27LJEJAZlNZRFREREcsHlp1/OiMtGMGX1FC4YfgE79+0MuyQRiUEK5iIiIvmgd6PejOw5kqmJU+nyvy7s2Lcj7JJEJMYomIuIiOSTng178m7Pd5mxdgZd/teF7Xs1l4KI/EbBXEREJB9d1vAyRvUcRcLaBDr/rzPb9m4LuyQRiREK5iIiIvnsjw3+yPu93md20mw6v61wLiJRCuYiIiIh6HFaDz7o/QFz1s3h/LfPZ+uerWGXJCIhUzAXEREJycX1L+bDyz9k3vp5dHq7E1v2bAm7JBEJkYK5iIhIiC6qdxEfXf4RCzYsoNNbndi8e3PYJYlISBTMRUREQnZh3QsZfcVoFm1cxHlvncem3ZvCLklEQqBgLiIiEgO6ntqV0VeMZsmmJQrnIkWUgrmIiEiM6HJqF8ZcOYYfN/9Ix2Ed2fjrxrBLEpF8pGAuIiISQzrX6cwnV37C8i3L6fhWRzb8uiHskkQknyiYi4iIxJhOp3Ti0z6f8tOWn+gwrAPrd60PuyQRyQcK5iIiIjGoY+2OjO07lpXbVtJhWAfW7VoXdkkikscUzEVERGJU+1rtGdtnLL9s/4UOwzqQtDMp7JJEJA8pmIuIiMSwdrXa8Xnfz1m9fTXth7Vn7c61YZckInlEwVxERCTGnXvyuYz70zjW7lxL+6HtWbNjTdgliUgeUDAXEREpAM456RzG9R3Hul3raD+sPYk7EsMuSURymYK5iIhIAdH2pLZ88acvWL9rPe2Htmf19tVhlyQiuUjBXEREpABpU7MNX/75Szbu3kj7Ye1ZtX1V2CWJSC5RMBcRESlgWtdozfg/j2fz7s20H9qeX7b9EnZJIpILFMxFREQKoJbVWzL+z+PZsmcL7Ye1Z+W2lWGXJCJHScFcRESkgDqr+ll8ddVXbNu7jfZD2zMtcRrthrbTw4hECigFcxERkQKsRbUWTLhqAjv27aDT25349pdvGTBpQNhlicgRUDAXEREp4JpVbcbIniPZlbwLx3lt1mt6EJFIAaRgLiIiUgh8vORjiscVByA5ksyZ/z2Tr1Z8FXJVInI4FMxFREQKuKSdSbw55032R/antW3evZnz3z6fzm93ZnbS7BCrE5GcytNgbmZvmNkGM1uQrm2Amc0zszlm9qWZVQvay5vZJ2Y218wWmtk16Y652syWBa+r07U3N7P5ZrbczJ43MwvajzOz8cH+482sYl5+ThERkTANmDyAiEcytBWPL07bGm2ZmTSTZkOa0ffDvvy89eeQKhSRnMjrHvOhQNdMbU+6+xnu3gT4FHgwaO8PLHL3M4H2wFNmVsLMjgMeAloBLYGH0gXtl4HrgLrBK/W97gEmuHtdYEKwLiIiUihNSZxCckpyhrbklGR+PfArK25Zwb3n3MtHiz+i/ov1uW3cbWz8dWNIlYpIVorl5cndfbKZ1crUtiPd6jGAp24Cjg16vcsCW4ADQBdgvLtvATCz8UBXM5sIlHP3qUH7W8AlwOdAD6LhHmAYMBG4O1c/nIiISIyYfUPWQ1WeOO8J+p/Vn4cnPswL01/gjdlvcHfbu7mt9W0cU+KYfKpSRLITyhhzM3vczFYDffmtx/xFoAGwFpgP3OruEaA6sDrd4YlBW/VgOXM7QGV3TwqW1wGV8+JziIiIFBTVy1Xn1e6vsuDvC+hYuyP/+uZf1H2hLkNmDuFA5EDY5YkIIQVzd7/f3WsCw4GbguYuwBygGtAEeNHMyuXCezm/9cpnYGbXm1mCmSVs3Kiv9UREpPBrUKkBH1/xMd9d8x21K9bmhk9v4PSXTuejxR8R/StTRMIS9qwsw4HLguVrgA89ajnwM3AasAaome6YGkHbmmA5czvAejOrChD83HCwN3f3Ie7ewt1bVKpUKZc+koiISOxre1JbvrvmOz6+/GPMjEtHXcrZb5zNt798G3ZpIkVWvgdzM6ubbrUHsCRYXgWcF+xTGagPrAC+ADqbWcXgps/OwBfBUJUdZtY6GJd+FTA6ONcYIHX2lqvTtYuIiEjAzOhxWg/m/30+r178Kqu2r+LcoefS/Z3uLNywMOzyRIocy8uvrczsHaI3YZ4ArCc6u8qFREN3BPgFuNHd1wTTJg4FqgIGDHT3/wXn+StwX3Dax939zaC9RXBMaaI3fd7s7m5mxwOjgJOC9+idevPoobRo0cITEhJy54OLiIgUQLv37+a5qc8x8PuB7ErexV/O/AuPdHiEGuVqZH+wiOSYmc109xa/a9d4sigFcxERkajNuzfz+LePM3jGYOIsjlta3sI959xDxdJ6LIhIbjhUMA97jLmIiIjEmOPLHM/TXZ5m6U1L6dWwF0/+8CR1nq/Df374D3sP7A27PJFCS8FcREREDqpWhVq89ce3mH3DbFrVaMU/xv+Dei/UY9icYaREUsIuT6TQUTAXERGRLJ1Z5Uw+7/s5E66awInHnMhfRv+Fpq80ZeyysZpisQBJ2plEu6HtWLdrXdilyCEomIuIiEiOdKzdkenXTefdnu+ye/9uuo3oRodhHZi+ZnrYpUkODJg8gO9WfceASQPCLkUOQcFcREREcizO4ujdqDeL+i/ixQteZNHGRbR6rRW93uvFss3Lwi5PMtmfsp8JKyZw7ehr+W/Cf4l4hCGzhvDewvfYsW9H2OVJJpqVJaBZWURERA7fzn07eWrKU/znh/+wL2Uf1zW7jgfbPUiVslXCLq3I2rpnK58v/5xPfvyEz5d9zvZ924m3eCIewdM9DN0wGlZqSKvqrWhdozWtarSiUaVGxMfFh1h90aDpErOhYC4iInLk1u9az6OTHmXIrCGUjC/JnW3u5K6z7+LYkseGXVqR8NOWn/jkx08Ys3QMk3+ZTIqncOIxJ3JxvYs556Rz+Ptnf88wo06J+BLc3PJmFm1cxLQ109iyJ/q4l2OKH8NZ1c+iVfVW0VeNVlQ7tlpYH6vQUjDPhoK5iIjI0Vu2eRn3f30/7y16j0plKvFguwe5vvn1lIgvEXZphUpKJIXpa6YzZukYxvw4hkUbFwFw+omnc3G9i+levzstq7ckzuLo91k/Xp/9OskpyWnHl4gvwbVNr2Vwt8G4O8u3LGfammlMS5zGtDXTmLNuDvsj+wGoWa4mrWq0SutZb1a1GWWKlwnlcxcWCubZUDAXERHJPdPXTOfur+5m4sqJ1KlYh8c7Pk6vRr2IM93edqR+Tf6V8SvG88nST/h02ads+HUD8RZPu1rt6F6vOxfXv5hTKp7yu+OavtKUOevm/K69SZUmzL5h9kHfa++BvcxOmh0N62umMTVxKiu3rQQg3uI5o/IZGYbA1Du+nv7bHgYF82womIuIiOQud2fc8nHc/dXdzN8wn+ZVmzOo0yDOO+W8sEsrMNbuXMunP37KmKVj+GrFV+xL2Uf5kuW5oO4FdK/Xna6nds23J7Ku37We6WumpwX1GWtnpN1AWr5keVpWbxkN6sEQmBPKnJAvdRVECubZUDAXERHJGymRFIbPH84D3zzAqu2r6FKnCwM7DaRJlSZhlxZz3J156+cxZukYPvnxE2asnQFA7Qq16V6/O93rd+cPJ/2B4vHFQ64UIh5hyaYlTEuMBvVpa6Yxf8N8Ih4B4JSKp/wW1Ku3okmVJpQsVjLkqmODgnk2FMxFRETy1t4Dexk8fTCPf/s42/Zuo+8ZfRnQYQC1KtQKu7RQJackM2nlpLTx4qu2r8IwWtVolTZevFGlRphZ2KVm69fkX5mZNDMtqE9LnMaanWuA6Lj2plWapvWot67RmtoVaheIz5XbFMyzoWAuIiKSP7bt3cbA7wby3LTniHiEfi36cf+59xepoQ9b9mxh7LKxjFk6hnHLx7EzeSeli5Xm/Drn071ed7rV61ZoppxM3JGYdlPptDXTSFibwO79uwE4ocwJaT3qrWu05qzqZ1GhVIVwC84HCubZUDAXERHJX4k7Enl44sO8OedNypYoy91t7+a21rcV2hk/lm1eljZE5btV35HiKVQpWyWtV/y82udRunjpsMvMcwciB1iwYUF0CMyaqUxLnMbiTYvTtp92wmkZhsA0rtyYYnHFQqw49ymYZ0PBXEREJBwLNyzkvq/vY8zSMVQtW5VH2j/CNU2vKfBhLCWSwtTEqWlDVJZsWgLAGZXPSAvjLaq10GwmwPa925mxdkaGITAbd28EoHSx0rSo1iLDEJga5WqEXPHRUTDPhoK5iIhIuL795Vvu/upupiRO4bQTTuP/zvs/etTvUaDGIO9K3sWXP33JmKVj+GzZZ2zavYliccVoX6t92pSGRX1MfU64Oyu3rfwtqK+ZxqykWWlzsVc7tlqGITDNqzWnbImyIVedcwrm2VAwFxERCZ+7M3rpaO756h6Wbl7K2TXP5t+d/k3bk9qGXdohJe5ITJvScMLPE0hOSaZCqQp0q9uNi+tdTNdTu1K+VPmwyyzw9h3Yx9z1czOMV1++ZTkAcRbH6SeeTuvqrdMehtSgUoNDfhuRtDOJKz64gnd7vhvKWH4F82womIuIiMSOA5EDvDn7TR6a+BBJu5LoXr87/3fe/9GwUsOwS8PdmbNuTtoQlVlJswCoU7FO2pSGbWu2jYkpDQu7Tbs3RedWTxfWt+3dBsCxJY6lZfWWaUNgWlVvReWylQHo91k/Xpn5Cjc2v5HB3Qbne90K5tlQMBcREYk9vyb/ynPTnmPQ94PYlbyLa5pcwyPtH6F6uer5Wse+A/uYuHJiWhhP3JGIYbSp2SZtiEqDExoUqGE3hVHEIyzbvCztIUjT1kxj3vp5HIgcAKBWhVqcceIZjF0+lgORA5QuVpoVt67I915zBfNsKJiLiIjErk27N/H45McZPGMw8XHx3NbqNu4+5+48nVpv0+5NaVMafvHTF+xK3kWZ4mXoXKdz2pSGJx5zYp69v+SOPfv3MCtpVlpQ/3z55+xK3gVE51a/tum1+d5rrmCeDQVzERGR2Pfz1p954JsHGD5/OMeVPo77/3A//c7qR6lipXLl/Es3LU3rFf9h9Q9EPEK1Y6ulzaLSsXbHXHsvyX9JO5M45flT2Htgb1pbGL3mCubZUDAXEREpOGYnzebeCffyxU9fcFL5k3isw2P0adyH+Lj4wzrPgcgBpqyekhbGf9z8IwBNqjRJG6LSrGozTWlYSPT7rB+vz349bXYXCKfXXME8GwrmIiIiBc+EFRP451f/ZFbSLM6ofAaDOg2iS50umNkhZ97YuW8nX/z0RdqUhlv2bKF4XHE61O6QFsZPKn9SiJ9K8krTV5oyZ92c37U3qdKE2TfMzrc6FMyzoWAuIiJSMEU8wqiFo7j/6/tZsXUFHWp1YFCnQbw55820mTfuOecePvnxE8YsHcM3K78hOSWZ40ofR7e63ehevzud63SmXMlyYX8UKSIUzLOhYC4iIlKwJack80rCKzw6+VE27d5EnMUR8QiG4UTzTt3j6qZNaXh2zbML/NNFpWBSMM+GgrmIiEjhsGPfDjoO68jMpJkAGEbr6q1585I3qX9C/ZCrEzl0MNedDCIiIlKo/Jr8Kws3Lkxbd5w56+fo6ZsS8xTMRUREpFAZMHkAEY9kaEvxFAZMGhBSRSI5o2AuIiIihcqUxCkZpsOD6PjzHxJ/CKkikZzRHQ8iIiJSqOTntHciuUk95iIiIiIiMUDBXEREREQkBiiYi4iIiIjEAAVzEREREZEYoGAuIiIiIhIDFMxFRERERGKAgrmIiIiISAxQMBcRERERiQHm7mHXEBPMbCPwS0hvfwKwKaT3jjW6FhnpevxG1yIjXY+MdD1+o2uRka5HRroevwnzWpzs7pUyNyqYxwAzS3D3FmHXEQt0LTLS9fiNrkVGuh4Z6Xr8RtciI12PjHQ9fhOL10JDWUREREREYoCCuYiIiIhIDFAwjw1Dwi4ghuhaZKTr8Rtdi4x0PTLS9fiNrkVGuh4Z6Xr8JuauhcaYi4iIiIjEAPWYi4iIiIjEAAXzkJhZTTP7xswWmdlCM7s17JrCZmbxZjbbzD4Nu5awmVkFM3vfzJaY2WIzaxN2TWEys9uD/08WmNk7ZlYq7Jryk5m9YWYbzGxBurbjzGy8mS0LflYMs8b8cohr8WTw/8o8M/vIzCqEWGK+Otj1SLftTjNzMzshjNrCcKjrYWY3B39GFprZv8OqLz8d4v+VJmY21czmmFmCmbUMs8b8dKjcFWu/SxXMw3MAuNPdGwKtgf5m1jDkmsJ2K7A47CJixHPAOHc/DTiTInxdzKw6cAvQwt1PB+KBK8KtKt8NBbpmarsHmODudYEJwXpRMJTfX4vxwOnufgbwI3BvfhcVoqH8/npgZjWBzsCq/C4oZEPJdD3MrAPQAzjT3RsB/wmhrjAM5fd/Nv4NPOLuTYAHg/Wi4lC5K6Z+lyqYh8Tdk9x9VrC8k2jwqh5uVeExsxpAN+C1sGsJm5mVB84FXgdw92R33xZqUeErBpQ2s2JAGWBtyPXkK3efDGzJ1NwDGBYsDwMuyc+awnKwa+HuX7r7gWB1KlAj3wsLySH+bAA8A/wTKFI3kh3ievwdGOju+4J9NuR7YSE4xLVwoFywXJ4i9Ls0i9wVU79LFcxjgJnVApoC00IuJUzPEv1LJBJyHbGgNrAReDMY2vOamR0TdlFhcfc1RHu4VgFJwHZ3/zLcqmJCZXdPCpbXAZXDLCaG/BX4POwiwmRmPYA17j437FpiRD3gD2Y2zcwmmdlZYRcUotuAJ81sNdHfq0Xp26U0mXJXTP0uVTAPmZmVBT4AbnP3HWHXEwYzuwjY4O4zw64lRhQDmgEvu3tT4FeKzjCF3wnG+/Ug+g+WasAxZvancKuKLR6dXqtI9YwejJndT/Tr6uFh1xIWMysD3Ed0mIJEFQOOIzp84R/AKDOzcEsKzd+B2929JnA7wTezRUlWuSsWfpcqmIfIzIoT/cMx3N0/DLueELUFupvZSmAk0NHM/hduSaFKBBLdPfUblPeJBvWiqhPws7tvdPf9wIfA2SHXFAvWm1lVgOBnkfh6/lDM7C/ARUBfL9rzANch+o/YucHv1BrALDOrEmpV4UoEPvSo6US/mS0yN8RmcjXR36EA7wFF5uZPOGTuiqnfpQrmIQn+tf46sNjdnw67njC5+73uXsPdaxG9qe9rdy+yPaLuvg5YbWb1g6bzgEUhlhS2VUBrMysT/H9zHkX4Zth0xhD9S5bg5+gQawmVmXUlOhSuu7vvDrueMLn7fHc/0d1rBb9TE4Fmwe+VoupjoAOAmdUDSgCbwiwoRGuBdsFyR2BZiLXkqyxyV0z9LtUDhkJiZucA3wLz+W1c9X3uPja8qsJnZu2Bu9z9opBLCZWZNSF6I2wJYAVwjbtvDbWoEJnZI8DlRIcpzAauTb2Rqygws3eA9kR7+dYDDxENG6OAk4BfgN7ufrCbAAuVQ1yLe4GSwOZgt6nufmMoBeazg10Pd3893faVRGc0KhJB9BB/Pt4G3gCaAMlE/475OqQS880hrsVSorN+FQP2Av2KyjDSQ+UuouPMY+Z3qYK5iIiIiEgM0FAWEREREZEYoGAuIiIiIhIDFMxFRERERGKAgrmIiIiISAxQMBcRERERiQEK5iIiITMzN7On0q3fZWYP59K5h5pZz9w4Vzbv08vMFpvZNwfZVs/MxprZMjObZWajzKyymbU3s0+P8P1uC55yKSJSaCiYi4iEbx9wqZnF1NMIzazYYez+N+A6d++Q6RylgM+Al929rrs3A14CKh1lebcBhxXMzSz+KN9TRCRPKZiLiITvADAEuD3zhsw93ma2K/jZ3swmmdloM1thZgPNrK+ZTTez+WZWJ91pOplZgpn9aGYXBcfHm9mTZjbDzOaZ2Q3pzvutmY3hIE+cNbMrg/MvMLNBQduDwDnA62b2ZKZD+gBT3P2T1AZ3n+juCzKd92Ezuyvd+gIzq2Vmx5jZZ2Y2N2i73MxuAaoB36T20JtZZzObEvTIv2dmZYP2lWY2yMxmAb3M7BYzWxR85pHZ/HcREclXh9MbIiIieWcwMM/M/n0Yx5wJNAC2EH1C7Gvu3tLMbgVuJtqrDFALaAnUIRpmTwWuAra7+1lmVhL43sy+DPZvBpzu7j+nfzMzqwYMApoDW4EvzewSd3/UzDoSfaJiQqYaTweO5smCXYG17t4tqKG8u283szuADu6+Kfim4V9AJ3f/1czuBu4AHg3OsTnoqcfM1gK13X2fmVU4irpERHKdesxFRGKAu+8A3gJuOYzDZrh7krvvA34CUoP1fKJhPNUod4+4+zKiAf40oDNwlZnNIfpI6uOBusH+0zOH8sBZwER33+juB4DhwLmHUe+RmA+cH/R6/8Hdtx9kn9ZAQ6L/uJgDXA2cnG77u+mW5wHDzexPRL+pEBGJGQrmIiKx41miY7WPSdd2gOB3tZnFASXSbduXbjmSbj1Cxm9EPdP7OGDAze7eJHjVdvfUYP/r0XyITBYS7WHPTtrnDJQCcPcfifbgzwceC4bNZGbA+HSfpaG7/y3d9vSfpxvRbyeaATMOcxy9iEieUjAXEYkR7r4FGEU0nKdayW/BtjtQ/AhO3cvM4oJx56cAS4EvgL+bWXFImznlmKxOAkwH2pnZCcGNlFcCk7I5ZgRwtpl1S20ws3PN7PRM+60kGpYxs2ZA7WC5GrDb3f8HPJm6D7ATODZYngq0DYboEIxLr5e5kOAfNjXd/RvgbqA8UDab+kVE8o16CkREYstTwE3p1l8FRpvZXGAcR9abvYpoqC4H3Ojue83sNaLDXWaZmQEbgUuyOom7J5nZPcA3RHupP3P30dkcsye44fRZM3sW2E90OMmtQPpZaD4gOrRmIdGhNT8G7Y2BJ80sEhz796B9CDDOzNa6ewcz+wvwTjBeHqJjzn8ko3jgf2ZWPqj/eXffllX9IiL5ydwzf8MpIiIiIiL5TUNZRERERERigIK5iIiIiEgMUDAXEREREYkBCuYiIiIiIjFAwVxEREREJAYomIuIiIiIxAAFcxERERGRGKBgLiIiIiISA/4fVszhPmhpwOsAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 864x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# range of number of clusters\n",
    "num_clusters = range(2, 22, 2)\n",
    "\n",
    "# list to record sum of squared distances\n",
    "sum_square_error = []\n",
    "\n",
    "# iterate through different number of clusters and append sse\n",
    "for k in num_clusters:\n",
    "        sum_square_error.append(MiniBatchKMeans(n_clusters=k, init_size=1024, batch_size=2048, random_state=42).fit(X).inertia_)\n",
    "        print('now fitting {} clusters using  Mini batch K-means algorithm'.format(k))\n",
    "\n",
    "# plot ssm vs k\n",
    "plt.figure(figsize=(12, 5))\n",
    "plt.plot(num_clusters, sum_square_error, \"g^-\")\n",
    "plt.xticks(num_clusters)\n",
    "plt.xlabel('Number of Clusters')\n",
    "plt.ylabel(\"Sum of Square Distance\")\n",
    "plt.title('Elbow Method')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "2fb1dbde",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Cluster 1\n",
      "impact,amp,world,global,amid covid,amid,pandemic covid,covid,covid pandemic,pandemic\n",
      "\n",
      "Cluster 2\n",
      "help,year,trump,today,like,school,just,need,health,amp\n",
      "\n",
      "Cluster 3\n",
      "coronavirus,russian,coronavirus vaccine,putin,vaccine covid,trial,russia,covid,covid vaccine,vaccine\n",
      "\n",
      "Cluster 4\n",
      "new zealand,coronavirus,covid new,case covid,new covid,death,covid,case,new case,new\n",
      "\n",
      "Cluster 5\n",
      "news coronavirus,global,coronavirus pandemic,news,covid covid,pandemic,covid,coronavirus covid,covid coronavirus,coronavirus\n",
      "\n",
      "Cluster 6\n",
      "long time,challenging,need,year,people,amp,covid time,covid,time covid,time\n",
      "\n",
      "Cluster 7\n",
      "covid test,death,single,single day,test,india,case,day covid,covid,day\n",
      "\n",
      "Cluster 8\n",
      "slow spread,reporting symptom,sooner,self reporting,sooner self,help slow,case sooner,covid identify,risk case,identify risk\n",
      "\n",
      "Cluster 9\n",
      "covid positive,need,blood,covid,test positive,test,tested positive,tested,positive covid,positive\n",
      "\n",
      "Cluster 10\n",
      "month ago,amp,state covid,case,month covid,united,united state,covid,month,state\n",
      "\n",
      "Cluster 11\n",
      "don,mask covid,covid,face mask,face,wearing mask,wear mask,wearing,wear,mask\n",
      "\n",
      "Cluster 12\n",
      "trump,covid want,just,people,know,want know,don,don want,covid,want\n",
      "\n",
      "Cluster 13\n",
      "case death,india,reported,new covid,new,death,total,covid,covid case,case\n",
      "\n",
      "Cluster 14\n",
      "keeping,home,school,amp,covid safe,covid,safe covid,stay safe,stay,safe\n",
      "\n",
      "Cluster 15\n",
      "need,don,died,people died,amp,young,people covid,young people,covid,people\n",
      "\n",
      "Cluster 16\n",
      "update,need,test,covid death,just,patient,testing,covid covid,death,covid\n"
     ]
    }
   ],
   "source": [
    "# create the models and fit \n",
    "# cluster_predictions = KMeans(n_clusters=8,random_state=42).fit_predict(X)\n",
    "cluster_predictions = MiniBatchKMeans(n_clusters=16, init_size=1024, batch_size=2048, random_state=42).fit_predict(X)\n",
    "def get_top_keywords(data, clusters, labels, n_terms):\n",
    "    '''\n",
    "    This function displays the top keywords based on tf-idf score.\n",
    "    '''\n",
    "    # group tf-idf array based on predictions\n",
    "    df = pd.DataFrame(data.todense()).groupby(clusters).mean()\n",
    "    \n",
    "    # loop through each clusters and print top 10 score words\n",
    "    for i,r in df.iterrows():\n",
    "        print('\\nCluster {}'.format(i+1))\n",
    "        print(','.join([labels[t] for t in np.argsort(r)[-n_terms:]]))\n",
    "\n",
    "# run the code\n",
    "get_top_keywords(X, cluster_predictions, vectorizer.get_feature_names(), 10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
